#!/bin/bash
set -x

# hadoop bin path
hadoop_exe=/home/a/libexec64/hadoop/current/bin/hadoop
# streaming jar path
hadoop_streaming_jar=/home/a/libexec64/hadoop/current/contrib/streaming/hadoop-streaming-0.20.2-cdh3u0.jar

input_dir=/init/init_20120321/match/*
output_dir=/test/matchGrep
local_data=/home/wenxun.zhm/grep_tmp

pattern1="^连衣裙"
#pattern1="连衣裙"
#pattern2=91467180

sudo -u ads $hadoop_exe fs -rmr $output_dir
sudo -u ads $hadoop_exe jar $hadoop_streaming_jar -D mapred.reduce.tasks=0 \
                                      -D mapred.job.name="Grep Data" \
                                      -jobconf stream.non.zero.exit.is.failure=false \
                                      -inputformat org.apache.hadoop.mapred.SequenceFileAsTextInputFormat \
                                      -outputformat org.apache.hadoop.mapred.SequenceFileOutputFormat \
                                      -input $input_dir \
                                      -output $output_dir \
                                      -mapper "/bin/grep '$pattern1'" 
#-reducer /bin/cat 
#-outputformat org.apache.hadoop.mapred.SequenceFileOutputFormat \
#-outputformat org.apache.hadoop.mapred.TextOutputFormat \
#rm $local_data
#$hadoop_exe fs -getmerge $output_dir $local_data

#grep $pattern2 $local_data > $local_data.ad
